import requests
from lxml import etree
import os

os.chdir(r"E:\作业")

header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'}

# 设置初始的url
url = 'https://movie.douban.com/top250?start='

f = open("movie.txt", "w")


def page_list(url):
    resp = requests.get(url, headers=header)
    html = etree.HTML(resp.text)
    result = html.xpath("//ol[@class='grid_view']//li//div[@class='pic']/a/@href")
    return result


# 获取详细内容，并保存到文件中
def getPageDetail(detail_rul):
    resp_d = requests.get(detail_rul, headers=header)
    html_d = etree.HTML(resp_d.text)
    title = html_d.xpath("//h1//text()")
    tContent=""
    for tc in title:
        tContent=tContent+tc

    # print(title)
    # print(title[0])
    content_list = html_d.xpath("//div[@id='info']//text()")
    # print(type(content_list[0]))
    all_content = ""
    for c in content_list:
        all_content = all_content + c
    # 捕获异常的处理
    try:
        f.write(tContent)

        f.write(all_content)
    except:
        print(all_content)


def main():
    for page in range(0, 250, 25):
        pageUrl = url + str(page) + "&filter="
        pageList = page_list(pageUrl)
        for detail in pageList:
            getPageDetail(detail)



if __name__ == '__main__':
    main()
